/* This program generates the (fips-sic3) innovation index.
   

*/




clear
capture log close
set more off

log using "[redacted]mk_innovindex_surcnty.log", replace

global path "[redacted]"
global raw "${path}raw_data/"
global inter "${path}inter/"
global temp  "${path}inter/"
global output "${path}output_data/"
global programs "${path}programs/"
global logs "${path}logs/"
global out "${path}out/"
global tables "${path}out/tables/"
global graphs "${path}out/graphs/"





******************************
*** DATA on COUNTIES IN RADIUS ***
use ${inter}county_dists.dta, clear
drop hospname hospnum
drop if univsysnum==273 & city=="Champaign"
replace city="Urbana-Champaign" if city=="Urbana" & univsysnum==273


preserve
   contract univsysnum univsysname cont
   drop _freq
   duplicates tag univsysnum, gen(dup)
   tab dup, miss
   gen multlocation = (dup>0)
   drop dup
   sort univsysnum cont
   save ${temp}multlocation_univs.dta, replace
restore   

merge m:1 univsysnum cont using ${temp}multlocation_univs.dta
drop _m
keep univsysnum univsysname city state contstcntyfips surstcntyfips stabbrev_sur cntyname_sur multloc
drop if univsysnum==317 & city=="Houston" & cont==48453
preserve 
   keep if multloc==1
   sort univsysnum cont sur
   by univsysnum cont: gen surcounter=_n
   reshape wide surst stabbrev_sur cntyname_sur, i(univsysname univsysnum city state cont) j(surcounter)   
   sort univsysnum cont
   save ${temp}counties_multloc_univs.dta, replace
restore

keep if multloc==0

sort univsysnum cont sur
by univsysnum cont: gen surcounter=_n
reshape wide surst stabbrev_sur cntyname_sur, i(univsysname univsysnum city state cont) j(surcounter)   
sort univsysnum cont
save ${temp}counties_singleloc_univs.dta, replace  

*** END DATA on COUNTIES IN RADIUS ***
**************************************

*******************************************************
*** ATTACH SURROUNDING COUNTIES TO PLACE-FIPS-XWALK ***
*******************************************************
use ${temp}counties_singleloc_univs.dta, clear

append using ${temp}counties_multloc_univs.dta
contract contstcntyfips surstcntyfips* 
count
sort cont surstcntyfips1 surstcntyfips2 surstcntyfips3 surstcntyfips4 surstcntyfips5 surstcntyfips6 surstcntyfips7 surstcntyfips8 surstcntyfips9 surstcntyfips10
by cont: drop if _n>1
drop _freq
save ${inter}counties_in_260uni_radius.dta, replace

use ${inter}place_fips_xwalk.dta, clear

rename numfips contstcntyfips
merge m:1 contstcntyfips using ${inter}counties_in_260uni_radius.dta
keep if _m==3
drop _m
save ${inter}place_fips_sur_xwalk.dta, replace

***************************
*** END PLACE-FIPS XWALK ***
***************************

************************************************
***  PREP TECH CLASS TO INDUSTRY CONCORDANCE ***
************************************************

use ${raw}uspto-sic3-concordance.dta, clear

drop if sic3==0
rename sic3 numsic3
tostring numsic3, gen(sic3)
replace sic3="0"+sic3 if length(sic3)==2

contract uspto
drop _freq
sort uspto
save ${temp}usptogroups.dta, replace

use ${inter}univandhosppatents.dta, clear


rename nclass_ccl uspto
sort uspto
merge m:1 uspto using ${temp}usptogroups.dta
keep if _m!=3
contract uspto _merge
drop _freq
count
sort _m uspto
save ${temp}usptostodrop.dta, replace
keep if _m==1
drop _m
sort uspto
save ${temp}todrop1s.dta, replace
use ${temp}usptostodrop.dta, clear
keep if _m==2
drop _m
sort uspto
save ${temp}todrop2s.dta, replace

use ${raw}uspto-sic3-concordance.dta, clear

drop if sic3==0
rename sic3 numsic3
tostring numsic3, gen(sic3)
replace sic3="0"+sic3 if length(sic3)==2

sort uspto
merge m:1 uspto using ${temp}todrop2s.dta
drop if _merge==3
drop _merge
egen sequspto = group(uspto)

save ${inter}uspto_sic3_forjoinby.dta, replace

*****************************
*** END CONCORDANCE PREP ***
*****************************

********************************************************************************
*** PREP PATENT DATA (TO CREATE *industry* INNOVATION INDEX FROM UNIs and HOSPS)  ***
********************************************************************************
use ${inter}univandhosppatents.dta, clear

rename patent patnum
rename nclass_ccl uspto
rename nclass oldnclass


merge m:1 patnum pdpass using ${temp}assplace.dta
keep if _m==3 & inlist(univsysnum,60,127,223,254,255,259,260,279,280,297,300,316,317,323)
keep patnum pdpass uspto appyear univsysnum univsysname hospnum hospname place st cntry
rename st statebest
rename cntry countrybest
sort statebest place   
merge m:1 statebest place using ${inter}place_fips_sur_xwalk.dta, keep(match)
drop _m

sort patnum pdpass
save ${temp}univhosp_extraplaces.dta, replace

use ${inter}univandhosppatents.dta, clear
rename patent patnum
rename nclass_ccl uspto
rename nclass oldnclass

merge m:1 univsysnum using ${temp}counties_singleloc_univs.dta, update
drop _merge

drop if inlist(univsysnum,60,127,223,254,255,259,260,279,280,297,300,316,317,323)
merge m:1 patnum pdpass using ${temp}assplace.dta, gen(mergeplace)
drop if mergeplace==2
rename st statebest
rename cntry countrybest
sort statebest place   
merge m:1 statebest place using ${inter}place_fips_sur_xwalk.dta
drop if _m==2
drop _merge

append using ${temp}univhosp_extraplaces.dta

drop if cont==.
capture drop _freq
save ${temp}unipatswlocs.dta, replace

***************************
*** END PATENT DATA PREP ***
***************************



 foreach year in 1975 1980 1985 1990 {
   foreach rank in  260 {

use ${temp}unipatswlocs.dta, clear

keep if appyear<=`year'
contract univsysname univsysnum hosp
rename _freq pre`year'_patcount
gsort -pre`year'_patcount univsysname
gen pre`year'univpatrank = _n

duplicates tag univsysnum, gen(dup)
gsort univsysnum -hosp
by univsysnum: drop if _n>1
drop dup

sort univsysnum
save ${temp}pre`year'patrank.dta, replace

use ${temp}unipatswlocs.dta, clear
sort univsysnum appyear
merge m:1 univsysnum using ${temp}pre`year'patrank.dta
keep if _merge==3
drop _merge

sort uspto
merge m:1 uspto using ${temp}todrop1s.dta
drop if _merge==3
drop _merge

keep if pre`year'univpatrank<=`rank'
drop if appyear>`year'

save ${temp}pre`year'patents_top`rank'.dta, replace

set more off
joinby uspto using ${inter}uspto_sic3_forjoinby.dta

drop fips state_fips county_fips county popcounty placestate place mergeplace
reshape long surstcntyfips stabbrev_sur cntyname_sur, i(patnum pdpass contstcntyfips sic3 numsic3) j(surcounter)

drop if surstcntyfips==.

foreach var in mfg_freq use_freq {
   gen p`var'`year' = allpats*`var' 
   gen c`var'`year' = allcites*`var'
   gen nsc`var'`year' = allnscites*`var'
}

gen univcont = (contstcntyfips==surstcntyfips)
gen univsur = 1


collapse (sum) pmfg_freq cmfg_freq nscmfg_freq puse_freq cuse_freq nscuse_freq (mean) pre`year'univpatrank univsur (max) univcont, by(surstcntyfips sic3)
drop if pmfg_freq==0 & puse_freq==0

rename surstcntyfips numfips
tostring numfips, gen(fips)
drop if fips=="" | fips=="."
replace fips = "0"+fips if length(fips)==4

sort fips sic3


save ${inter}indinnovindex_xsection_fipssic3_until`year'_top`rank'.dta, replace

   }
}




use ${inter}indinnovindex_xsection_fipssic3_until1975_top260.dta, clear

foreach year in 1980 1985 1990 {
   
merge 1:1 fips sic3 using ${inter}indinnovindex_xsection_fipssic3_until`year'_top260.dta
drop _merge
   }

save ${inter}indinnovindex_xsection_fipssic3.dta, replace



log close

